Load required packages

library(tidyverse)
library(phyloseq)
library(speedyseq)
library(plotly)
options(getClass.msg=FALSE) # https://github.com/epurdom/clusterExperiment/issues/66
#this fixes an error message that pops up because the class 'Annotated' is defined in two different packages

Load functions from Github

'%!in%' <- function(x,y)!('%in%'(x,y))

source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_taxa_tests.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_normalisation.R")
## Loading required package: scales
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_alpha.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_beta.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_heatmap.R")

Load physeq object

ps = "data/processed/physeq_update_11_1_21.RDS"

ps %>% 
  here::here() %>%
  readRDS() %>%
  phyloseq_get_strains_fast() %>%
  phyloseq_remove_chloro_mitho() -> physeq
## Joining, by = "ASV"
physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 346 taxa and 384 samples ]:
## sample_data() Sample Data:        [ 384 samples by 50 sample variables ]:
## tax_table()   Taxonomy Table:     [ 346 taxa by 8 taxonomic ranks ]:
## phy_tree()    Phylogenetic Tree:  [ 346 tips and 344 internal nodes ]:
## refseq()      DNAStringSet:       [ 346 reference sequences ]
## taxa are rows
"data/raw/hplc Fermentation (Salvato automaticamente).xlsx" %>%
      readxl::read_xlsx(sheet = "All total") -> metabolites


metabolites %>% 
  glimpse()
## Rows: 575
## Columns: 14
## $ Sample_Id     <chr> "CR-10", "CR-11", "CR-12", "CR-13", "CR-14", "CR-15", "…
## $ Lactose_mM    <dbl> 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,…
## $ Glucose_mM    <dbl> 0.000, 0.000, 0.000, 0.000, 0.000, 0.947, 0.000, 0.000,…
## $ Galactose_mM  <dbl> 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 1.595, 1.759,…
## $ Succinat_mM   <dbl> 3.383, 2.586, 2.197, 0.751, 0.748, 1.969, 2.798, 4.295,…
## $ Lactat_mM     <dbl> 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 6.491, 8.223,…
## $ Formiat_mM    <dbl> 2.388, 2.904, 2.696, 3.557, 2.510, 3.179, 4.719, 5.614,…
## $ Acetat_mM     <dbl> 69.822, 67.802, 68.825, 70.645, 68.102, 49.775, 45.373,…
## $ Propionat_mM  <dbl> 14.232, 14.141, 12.500, 13.205, 13.395, 11.560, 9.765, …
## $ Isobutyrat_mM <dbl> 6.565, 6.195, 6.488, 6.865, 7.119, 5.270, 4.854, 0.000,…
## $ Butyrat_mM    <dbl> 34.057, 31.397, 36.942, 38.033, 37.673, 22.600, 14.158,…
## $ Isovalerat_mM <dbl> 6.210, 5.467, 5.985, 6.673, 6.656, 4.267, 2.710, 1.417,…
## $ Valerat_mM    <dbl> 8.270, 7.617, 7.556, 7.906, 7.466, 3.218, 1.384, 0.365,…
## $ Total_SCFA_mM <dbl> 144.927, 138.109, 143.189, 147.635, 143.669, 101.838, 9…
metabolites %>% 
  DT::datatable()
physeq@sam_data %>%
  data.frame() %>%
  rownames_to_column('id') %>%
  left_join(
    metabolites,
    by = c("Sample_description" = "Sample_Id")) %>%
  column_to_rownames('id') %>% 
  sample_data() -> physeq@sam_data
physeq %>%
  sample_data() %>%
  data.frame() %>%
  write_tsv(paste0(here::here(),
                    "/data/processed/",
       "sample_data_physeq_update_11_1_21",
       format(Sys.time(), "%Y%b%d"),".tsv"))
physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 346 taxa and 384 samples ]:
## sample_data() Sample Data:        [ 384 samples by 63 sample variables ]:
## tax_table()   Taxonomy Table:     [ 346 taxa by 8 taxonomic ranks ]:
## phy_tree()    Phylogenetic Tree:  [ 346 tips and 344 internal nodes ]:
## refseq()      DNAStringSet:       [ 346 reference sequences ]
## taxa are rows

We will be analyzing only the PolyFermS samples here so take a subset of the physeq object.

physeq %>% 
  subset_samples(Experiment == "Continuous") %>% 
  subset_samples(Paul %!in% c("Paul")) %>%
  subset_samples(Reactor != "IR2") -> ps_polyFermS

sample_data(ps_polyFermS)$Reactor <- fct_relevel(sample_data(ps_polyFermS)$Reactor, "IR1", "CR", "TR1", "TR2","TR3", "TR4", "TR5", "TR6") 

sample_data(ps_polyFermS)$Treatment <- fct_relevel(sample_data(ps_polyFermS)$Treatment, "UNTREATED",  "CTX+HV292.1", "CTX","HV292.1","VAN+CCUG59168", "VAN",  "CCUG59168") 

sample_data(ps_polyFermS)$Reactor_Treatment <- fct_relevel(sample_data(ps_polyFermS)$Reactor_Treatment, "IR1_UNTREATED","CR_UNTREATED", "CR_CTX", "CR_VAN", "TR1_CTX+HV292.1","TR2_CTX", "TR3_HV292.1", "TR5_VAN+CCUG59168", "TR4_VAN", "TR6_CCUG59168") 

ps_polyFermS
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 346 taxa and 242 samples ]:
## sample_data() Sample Data:        [ 242 samples by 63 sample variables ]:
## tax_table()   Taxonomy Table:     [ 346 taxa by 8 taxonomic ranks ]:
## phy_tree()    Phylogenetic Tree:  [ 346 tips and 344 internal nodes ]:
## refseq()      DNAStringSet:       [ 346 reference sequences ]
## taxa are rows
ps_polyFermS %>%
  sample_data() %>%
  data.frame() -> df
measures = df %>% select(ends_with("mM")) %>% colnames()

# define a function to plot scatter plot
my_fn <- function(data, mapping, ...){
  p <- ggplot(data = data, mapping = mapping) +
    geom_point() +
    geom_smooth(method=lm, ...)
  p
}


df %>%
  GGally::ggpairs(columns = measures,
                  ggplot2::aes(colour = Reactor),
                  # legend = 1,
                  progress = FALSE,
                  upper = list(
                    continuous = GGally::wrap('cor', method = "pearson")
                  ),
                  lower = list(continuous = my_fn)) -> p_corr

p_corr

df %>%
  plot_alphas(measure = measures,
              x_group = "Reactor_Treatment",
              colour_group = "Enrichment",
              fill_group = "Enrichment",
              shape_group = "Enrichment",
              facet_group = "Reactor_Treatment",
              test_group = "Reactor_Treatment",
              test_group_2 = "Enrichment") -> out
plot_alpha_time <- function(df, 
                            x = "Day_from_Inoculum", 
                            y = "value", 
                            shape = "neg",
                            fill = "Reactor_Treatment",
                            group = "Reactor_Treatment", 
                            facet)
{
  df %>%
  arrange(Day_from_Inoculum) %>%
  ggplot(aes_string(x = x,
             y = y, shape = shape)) +
  geom_point(size=2, alpha=0.9, aes_string(group = group, color = fill, fill = fill),  show.legend = FALSE) + 
  geom_path(inherit.aes = TRUE, aes_string(group=group),
            size = 0.08,
            linetype = "dashed") +
  facet_grid(as.formula(facet), scales = "free") +
  theme_light() +
  scale_color_viridis_d(na.value = "black") + 
  geom_vline(xintercept = c(23,39), 
             color="black", alpha=0.4) + 
  # geom_smooth(show.legend = TRUE, level = 0.95) + 
  scale_x_continuous(breaks=seq(0,90,10)) -> plot

  return(plot)
}
out$plot$data %>%
  dplyr::filter(alphadiversiy == "Total_SCFA_mM") %>%
  dplyr::mutate(neg = ifelse(value == 0, "neg", "pos")) %>%
  arrange(Day_from_Inoculum) %>%
  ggplot(aes_string(x = "Day_from_Inoculum",
                    y = "value", group = "Reactor_Treatment")) +
  geom_jitter(size=0.5, alpha=0.9, aes_string(group = "Reactor_Treatment", color = "Reactor_Treatment", fill = "Reactor_Treatment"),  show.legend = TRUE) + 
  geom_path(inherit.aes = TRUE, aes_string(group="Reactor_Treatment", fill = "Reactor_Treatment", color = "Reactor_Treatment", show.legend = FALSE),
            size = 0.001,
            linetype = "dashed") +
  # facet_grid(as.formula(facet), scales = "free") +
  geom_vline(xintercept = c(23,39), 
             color="black", alpha=0.4) + 
  geom_smooth(show.legend = FALSE, level = 0.95, alpha=0.05, size = 0.5 ,aes_string(group="Reactor_Treatment", color = "Reactor_Treatment", fill = "Reactor_Treatment")) +
  scale_x_continuous(breaks=seq(0,90,10)) +
  # scale_y_continuous(labels = scientific,
  #                    limits=c(1e+10, 1e+11), breaks = seq(1e+10, 1e+11, by = 1e+10),
  #                    trans = "log10") +
  labs(x="Day (from Inoculum)", y= "SCFA concentration [mM]",  
       col=NULL, fill = NULL, shape = NULL) +
  theme_light() +
  scale_color_viridis_d(na.value = "black") +
  scale_fill_viridis_d(na.value = "black") -> plot
## Warning: Ignoring unknown aesthetics: fill, show.legend
plot + theme(legend.position = "bottom")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot %>%
  plotly::ggplotly()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_time <- function(df, 
                      measure,
                      x = "Day_from_Inoculum", 
                      y = "value", 
                      shape = "neg",
                      fill = "Reactor_Treatment",
                      group = "Reactor_Treatment", 
                      facet)
{
  df %>%
  dplyr::filter(alphadiversiy %in% measure) %>%
  dplyr::mutate(alphadiversiy = fct_reorder(alphadiversiy, value, .desc = TRUE)) %>%
  dplyr::mutate(neg = ifelse(value == 0, "neg", "pos")) %>%
  arrange(Day_from_Inoculum) %>%
  ggplot(aes_string(x = x,
                    y = y)) +
  geom_jitter(size=0.5, alpha=0.9, aes_string(color = fill, fill = fill, shape = shape),  show.legend = TRUE) + 
  geom_path(inherit.aes = TRUE, aes_string(fill = fill, color = fill, show.legend = FALSE),
            size = 0.005,
            linetype = "dashed") +
  facet_grid(as.formula(facet), scales = "free") +
  geom_vline(xintercept = c(23,39), 
             color="black", alpha=0.4) + 
  geom_smooth(show.legend = FALSE, level = 0.95, alpha=0.05, size = 0.5 ,aes_string(color = fill, fill = fill)) +
  scale_x_continuous(breaks=seq(0,90,10)) +
  # scale_y_continuous(labels = scientific,
  #                    limits=c(1e+10, 1e+11), breaks = seq(1e+10, 1e+11, by = 1e+10),
  #                    trans = "log10") +
  theme_light() +
  scale_color_viridis_d(na.value = "black") +
  scale_fill_viridis_d(na.value = "black") -> plot

  return(plot + theme(legend.position = "bottom"))
}
out$plot$data %>%
  plot_time(measure = c("Total_SCFA_mM", "Acetat_mM", "Butyrat_mM", "Propionat_mM", "Isobutyrat_mM", "Valerat_mM", "Isovalerat_mM", "Succinat_mM"),
            facet = c("alphadiversiy ~ ."),  shape = NULL) + 
  labs(x="Day (from Inoculum)", y= "SCFA concentration [mM]",  
       col=NULL, fill = NULL, shape = NULL) + 
  scale_shape_manual(values=c(4, 19)) -> p4

p4

p4 %>% ggplotly()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
htmlwidgets::saveWidget(as_widget(p4 %>% ggplotly()), 
  paste0(here::here(),
                    "/data/processed/",
       "metabolites_",
       format(Sys.time(), "%Y%b%d"),".html"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# htmlwidgets::saveWidget(as_widget(p4 %>% ggplotly()), paste0("~/Documents/index.html"))
p4 + 
  facet_null() +
  facet_grid(alphadiversiy ~ Reactor_Treatment, scales = "free") +
  scale_color_manual(values = rep("black",8)) +
  scale_fill_manual(values = rep("black",8)) 

p4 + 
  facet_null() +
  facet_grid(Reactor_Treatment  ~ alphadiversiy, scales = "free") +
  scale_color_manual(values = rep("black",8)) +
  scale_fill_manual(values = rep("black",8)) + 
  scale_x_continuous(breaks=seq(0,90,20))

df %>%
  dplyr::select(ends_with("mM") | "Total_SCFA_mM") %>%
  drop_na() %>%
  # t() %>%
  scale(center = TRUE, 
        scale = TRUE) %>%
  dist(method= "euc") -> euc_met

plot_ordination(ps_polyFermS,
                ordination = phyloseq::ordinate(ps_polyFermS,
                                      distance = euc_met, 
                                      method = "PCoA")) -> pca

pca$layers[[1]] = NULL

pca +
  geom_point(size=2,
                   aes(color = Reactor_Treatment, 
                       fill = NULL,
                       shape = NULL,
                       alpha = Day_from_Inoculum)) + 
  theme_light() +
  geom_path(arrow = arrow(type = "open", angle = 30, length = unit(0.15, "inches")),
              size = 0.08, linetype = "dashed", inherit.aes = TRUE, aes(group=Reactor_Treatment, color = Reactor_Treatment)) +
  scale_alpha_continuous(range=c( 0.9, 0.3)) + 
  scale_color_viridis_d(na.value = "red") + 
  scale_fill_viridis_d(na.value = "red") + 
  scale_shape_manual(values = c(8, 21, 22, 23, 24, 16, 15, 18, 17)) + 
  theme_classic() -> p5

p5

p5 %>%
  plotly::ggplotly() -> p5ly

p5ly
htmlwidgets::saveWidget(as_widget(p5ly), 
  paste0(here::here(),
                    "/data/processed/",
       "metabolites_",
       format(Sys.time(), "%Y%b%d"),"_2.html"))
paste0(here::here(),
       "/data/processed/",
       "metabolites",
       "_",
       format(Sys.time(), "%Y%b%d")
       ,".RData") %>% save.image()
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] reshape2_1.4.4       scales_1.1.1         plotly_4.9.3        
##  [4] speedyseq_0.5.3.9001 phyloseq_1.32.0      forcats_0.5.0       
##  [7] stringr_1.4.0        dplyr_1.0.3          purrr_0.3.4         
## [10] readr_1.4.0          tidyr_1.1.2          tibble_3.0.5        
## [13] ggplot2_3.3.3        tidyverse_1.3.0.9000
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-0    ggsignif_0.6.0      ellipsis_0.3.1     
##   [4] rio_0.5.16          rprojroot_2.0.2     XVector_0.28.0     
##   [7] fs_1.5.0            rstudioapi_0.13     ggpubr_0.4.0       
##  [10] farver_2.0.3        DT_0.17             fansi_0.4.2        
##  [13] lubridate_1.7.9.2   xml2_1.3.2          codetools_0.2-18   
##  [16] splines_4.0.3       knitr_1.30          ade4_1.7-16        
##  [19] jsonlite_1.7.2      broom_0.7.3         cluster_2.1.0      
##  [22] dbplyr_2.0.0        compiler_4.0.3      httr_1.4.2         
##  [25] backports_1.2.1     assertthat_0.2.1    Matrix_1.3-2       
##  [28] lazyeval_0.2.2      cli_2.2.0           htmltools_0.5.1    
##  [31] prettyunits_1.1.1   tools_4.0.3         igraph_1.2.6       
##  [34] gtable_0.3.0        glue_1.4.2          Rcpp_1.0.6         
##  [37] carData_3.0-4       Biobase_2.48.0      cellranger_1.1.0   
##  [40] vctrs_0.3.6         Biostrings_2.56.0   multtest_2.44.0    
##  [43] ape_5.4-1           nlme_3.1-151        iterators_1.0.13   
##  [46] crosstalk_1.1.1     xfun_0.20           openxlsx_4.2.3     
##  [49] rvest_0.3.6         lifecycle_0.2.0     rstatix_0.6.0      
##  [52] zlibbioc_1.34.0     MASS_7.3-53         hms_0.5.3          
##  [55] parallel_4.0.3      biomformat_1.16.0   rhdf5_2.32.2       
##  [58] RColorBrewer_1.1-2  curl_4.3            yaml_2.2.1         
##  [61] reshape_0.8.8       stringi_1.5.3       S4Vectors_0.26.1   
##  [64] foreach_1.5.1       permute_0.9-5       BiocGenerics_0.34.0
##  [67] zip_2.1.1           rlang_0.4.10        pkgconfig_2.0.3    
##  [70] evaluate_0.14       lattice_0.20-41     Rhdf5lib_1.10.0    
##  [73] htmlwidgets_1.5.3   labeling_0.4.2      tidyselect_1.1.0   
##  [76] here_1.0.1          GGally_2.1.0        plyr_1.8.6         
##  [79] magrittr_2.0.1      R6_2.5.0            IRanges_2.22.2     
##  [82] generics_0.1.0      DBI_1.1.0           foreign_0.8-81     
##  [85] pillar_1.4.7        haven_2.3.1         withr_2.4.0        
##  [88] mgcv_1.8-33         abind_1.4-5         survival_3.2-7     
##  [91] modelr_0.1.8        crayon_1.3.4        car_3.0-10         
##  [94] utf8_1.1.4          rmarkdown_2.6       progress_1.2.2     
##  [97] grid_4.0.3          readxl_1.3.1        data.table_1.13.6  
## [100] vegan_2.5-7         reprex_0.3.0        digest_0.6.27      
## [103] stats4_4.0.3        munsell_0.5.0       viridisLite_0.3.0